In [1]:
!cat dbpedia_config.py
In [2]:
import subprocess
import os
import dbpedia_config
In [3]:
target = dbpedia_config.DATA_FOLDER
languages = dbpedia_config.LANGUAGES
In [4]:
# Ontology
# note that previously (2014 version and earlier) this was in bzip format.
if not os.path.exists('{0}/dbpedia.owl'.format(target)):
subprocess.call(['/usr/bin/wget',
'http://downloads.dbpedia.org/2015-10/dbpedia_2015-10.owl',
'-O', '{0}/dbpedia.owl'.format(target)],
stdout=None, stderr=None)
In [5]:
# current version: http://wiki.dbpedia.org/Downloads2015-04
db_uri = 'http://downloads.dbpedia.org/2015-10/core-i18n'
In [16]:
for lang in languages:
if not os.path.exists('{0}/instance_types_{1}.ttl.bz2'.format(target, lang)):
subprocess.call(['/usr/bin/wget',
'{1}/{0}/instance_types_{0}.ttl.bz2'.format(lang, db_uri),
'-O', '{0}/instance_types_{1}.ttl.bz2'.format(target, lang)],
stdout=None, stderr=None)
if not os.path.exists('{0}/interlanguage_links_{1}.ttl.bz2'.format(target, lang)):
subprocess.call(['/usr/bin/wget',
'{1}/{0}/interlanguage_links_{0}.ttl.bz2'.format(lang, db_uri),
'-O', '{0}/interlanguage_links_{1}.ttl.bz2'.format(target, lang)],
stdout=None, stderr=None)
if not os.path.exists('{0}/labels_{1}.ttl.bz2'.format(target, lang)):
subprocess.call(['/usr/bin/wget',
'{1}/{0}/labels_{0}.ttl.bz2'.format(lang, db_uri),
'-O', '{0}/labels_{1}.ttl.bz2'.format(target, lang)],
stdout=None, stderr=None)
if not os.path.exists('{0}/mappingbased_literals_{1}.ttl.bz2'.format(target, lang)):
subprocess.call(['/usr/bin/wget',
'{1}/{0}/mappingbased_literals_{0}.ttl.bz2'.format(lang, db_uri),
'-O', '{0}/mappingbased_literals_{1}.ttl.bz2'.format(target, lang)],
stdout=None, stderr=None)
if not os.path.exists('{0}/mappingbased_objects_{1}.ttl.bz2'.format(target, lang)):
subprocess.call(['/usr/bin/wget',
'{1}/{0}/mappingbased_objects_{0}.ttl.bz2'.format(lang, db_uri),
'-O', '{0}/mappingbased_objects_{1}.ttl.bz2'.format(target, lang)],
stdout=None, stderr=None)
In [7]:
# http://oldwiki.dbpedia.org/Datasets/NLP#h172-7
dbpedia_gender = 'http://wifo5-04.informatik.uni-mannheim.de/downloads/datasets/genders_en.nt.bz2'
if not os.path.exists('{0}/genders_en.nt.bz2'.format(target)):
subprocess.call(['/usr/bin/wget',
dbpedia_gender,
'-O', '{0}/genders_en.nt.bz2'.format(target)],
stdout=None, stderr=None)
In [8]:
# http://www.davidbamman.com/?p=12
# note that, in previous versions, this was a text file. now it's a bzipped file with n-triplets.
wikipedia_gender = 'http://www.ark.cs.cmu.edu/bio/data/wiki.genders.txt'
if not os.path.exists('{0}/wiki.genders.txt'.format(target)):
subprocess.call(['/usr/bin/wget',
dbpedia_gender,
'-O', '{0}/wiki.genders.txt'.format(target)],
stdout=None, stderr=None)
In [11]:
if not os.path.exists('{0}/long_abstracts_{1}.nt.bz2'.format(target, dbpedia_config.MAIN_LANGUAGE)):
subprocess.call(['/usr/bin/wget',
'{1}/{0}/long_abstracts_{0}.ttl.bz2'.format(dbpedia_config.MAIN_LANGUAGE, db_uri),
'-O', '{0}/long_abstracts_{1}.ttl.bz2'.format(target, dbpedia_config.MAIN_LANGUAGE)],
stdout=None, stderr=None)
In [12]:
# network data for english only
if not os.path.exists('{0}/page_links_{1}.ttl.bz2'.format(target, dbpedia_config.MAIN_LANGUAGE)):
subprocess.call(['/usr/bin/wget',
'{1}/{0}/page_links_{0}.nt.bz2'.format(dbpedia_config.MAIN_LANGUAGE, db_uri),
'-O', '{0}/page_links_{1}.ttl.bz2'.format(target, dbpedia_config.MAIN_LANGUAGE)],
stdout=None, stderr=None)
In [ ]: